Load the data

drug_df = 
  read_csv("data/VSRR_Provisional_Drug_Overdose_Death_Counts.csv") %>% 
  janitor::clean_names()
## Rows: 42180 Columns: 12
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (8): State, Month, Period, Indicator, Percent Complete, State Name, Foot...
## dbl (4): Year, Data Value, Percent Pending Investigation, Predicted Value
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

sum by states

state_clean_df = 
  drug_df %>% 
  filter(!(indicator %in% c("Number of Deaths", "Number of Drug Overdose Deaths", "Percent with drugs specified"))) %>% 
  select(state, year, month, indicator, predicted_value) %>% 
  filter(!(state == "US")) %>% 
  mutate(
    predicted_value = replace_na(predicted_value, 0)
  )

by years and states

based on predicted value

ys_df = 
  state_clean_df %>% 
  group_by(year, state) %>% 
  summarize(total_num = sum(predicted_value))
## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.

for the whole country

us_df =   
  state_clean_df %>% 
  filter(state == "US") %>% 
  view()

overview by times(not in different indicator)

based on value CDC provided

overview = 
  drug_df %>% 
  filter(indicator == c("Number of Deaths", "Number of Drug Overdose Deaths")) %>% 
  select("state", "year", "month", "indicator", "data_value") %>% 
  filter(!(state == "US")) %>% 
  group_by(state, year, indicator) %>% 
  summarize(data_value = sum(data_value))
## `summarise()` has grouped output by 'state', 'year'. You can override using the `.groups` argument.
overview_death = 
  overview %>% 
  filter(indicator == "Number of Deaths") %>% 
  mutate(
    death_count = min_rank(desc(data_value))
  )

overview_od = 
  overview %>% 
  filter(indicator == "Number of Drug Overdose Deaths")


od_overview_plot = 
  overview_od %>% 
  ggplot(aes(x = state, y = data_value, color = state)) +
  geom_point() +
  facet_grid(~year) +
  theme_set(theme_minimal() + theme(legend.position = "bottom")) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.4, hjust = 1))
  
ggplotly(od_overview_plot)
##2015 CA, 2016 FL, 2017 OH, 2018 FL, 2020 CA, 2021 CA

Trend across year:

trend_year_plot =
  overview_od %>% 
 ggplot(aes(x = year, y = data_value)) +
  geom_smooth(se = FALSE) +
  theme_set(theme_minimal() + theme(legend.position = "bottom"))  +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.4, hjust = 1))
trend_year_plot
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

overview by type of drugs by state

overview_drug = 
  state_clean_df %>% 
  group_by(state, indicator) %>% 
  summarize(predicted_value = sum(predicted_value))
## `summarise()` has grouped output by 'state'. You can override using the `.groups` argument.
drug_overview_plot = 
  overview_drug %>% 
  ggplot(aes(x = indicator, y = predicted_value, color = state)) +
  geom_point() +
  theme_set(theme_minimal() + theme(legend.position = "bottom")) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.4, hjust = 1))
  
ggplotly(drug_overview_plot)

overview by type for US

overview_drug_us = 
  state_clean_df %>% 
  group_by(indicator) %>% 
  summarize(predicted_value = sum(predicted_value))

drug_overview_us = 
  overview_drug_us %>% 
  ggplot(aes(x = indicator, y = predicted_value)) +
  geom_point() +
  theme_set(theme_minimal() + theme(legend.position = "bottom")) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.4, hjust = 1))
  
ggplotly(drug_overview_us)